##------------------------------------------------------------------#
# Business & Politics
# Event Study Main analysis
# Author: Jeff Allen (jallen71@jhu.edu)
# Last updated: December 5, 2020
#
# Table of Contents
# 1) Import and formatting
# 2) Cross-sectional correlation
# 3) Implement nonparametric Generalized Rank (GRANK) Test
# 4) Implement parametric Adjusted BMP Test
# 5) Summarize event study results
# 6) Firm-level cross-sectional analysis
#
# Note: the market model, abnormal returns, and aspects of the BMP
# statistic are generated in the corresponding file "es-data-prep."
##------------------------------------------------------------------#

library(reshape2)
library(ggplot2)
library(stargazer)
library(Hmisc)
library(sandwich)
library(lmtest)

## SET WORKING DIRECTORY

## Turn off scientific notation
options(scipen = 999)

##------------------------------------------------------------------#
## 1) Import and formatting
##------------------------------------------------------------------#

## Import data
firms.df <- read.csv("es_data.csv", stringsAsFactors = FALSE)

## Check structures
str(firms.df)

## Store number of firms
n <- length(unique(firms.df$firm))

## Identify window length (+/- 5 days)
wind.l <- 5

##-- Create estimation and event data frames --##

## Identify window
est.i <- which(firms.df$time < (wind.l * -1))
event.i <- which(firms.df$time > ((wind.l + 1) * -1))

## Extract days
estimation.df <- firms.df[est.i,]
event.df <- firms.df[event.i,]

## Clean up
rm(est.i, event.i)

#------------------------------------------------------------------#
# 2) Cross-sectional correlation
#------------------------------------------------------------------#

# The cross-section correlation (r.bar) will be used to implement
# both statistical tests: Adjusted BMP and GRANK found in Kolari
# and Pynnonen (KP) (2010, 2011)

## Create wide DFs for each event
ar.z.w <- dcast(time ~ firm, value.var = "ar",
                data = estimation.df[estimation.df$event == "ZTE",])

ar.h.w <- dcast(time ~ firm, value.var = "ar",
                data = estimation.df[estimation.df$event == "Huawei",])

ar.s.w <- dcast(time ~ firm, value.var = "ar",
                data = estimation.df[estimation.df$event == "Surveillance",])

## Event block correlation matrices
z.corr <- rcorr(as.matrix(ar.z.w[-1]), type = "pearson")
h.corr <- rcorr(as.matrix(ar.h.w[-1]), type = "pearson")
s.corr <- rcorr(as.matrix(ar.s.w[-1]), type = "pearson")

## Bind event bloc pairwise correlations
corr.summary <- c(
  z.corr$r[upper.tri(z.corr$r)],
  h.corr$r[upper.tri(h.corr$r)],
  s.corr$r[upper.tri(s.corr$r)]
)

## Store Rbar and Scaling factor (KP 2010, 4003)
r.bar <- mean(corr.summary)
kp.scale <- sqrt((1 - r.bar) / (1 + (n - 1)*r.bar))

## Clean up
rm(ar.h.w, ar.s.w, ar.z.w, corr.summary, h.corr, s.corr, z.corr)

#------------------------------------------------------------------#
# 3) Implement GRANK (KP 2011)
#------------------------------------------------------------------#

## Source: Kolari and Pynnonen (KP). 2011. Nonparametric rank tests
## for event studies. Journal of empirical finance 18(2011) 953-971.

## Standard deviation of scaled residuals in event window
sr.sd <- aggregate(sr ~ time, data = event.df, sd)
colnames(sr.sd)[2] <- "sd"

## Correlation corrected standard deviation (sa)
# KP (2010, 4003); KP (2011, eq. 6, pp. 955, footnote 6)
sr.sd$sa <- sqrt(sr.sd$sd ^ 2 / (1 - r.bar))

## Merge to create gsar.event
gsar.event <- merge(event.df, sr.sd, all.x = TRUE)

## Create GSAR for event period 
# Rescale using correlation corrected standard deviation (sa)
# KP (2011, eq. 8, pg. 955, footnote 6)
gsar.event$gsar <- gsar.event$sr / gsar.event$sa

## Reduce columns in gsar event
gsar.event <- gsar.event[,c("time", "firm", "gsar")]

## Create estimation period gsar data frame
# In the estimation period, sr = gsar (eq. 8, pg. 955)
# Therefore, rename sr as gsar
gsar.est <- estimation.df[,c("time", "firm", "sr")]
colnames(gsar.est)[3] <- "gsar"

## Bind estimation period and event window
gsar.df <- rbind(gsar.est, gsar.event)
rm(gsar.est, gsar.event, sr.sd)

## Convert GSAR to wide
gsar.df.w <- dcast(gsar.df, time ~ firm, value.var = "gsar")

## Create DFs that append each day in event window to estimation period,
# and store in a list
# Why? KP (2011) only use 1 event window day at a time

## Create empty list
rank.list <- vector(mode = "list", length = 11)

## Create event window vector
e.w <- -5:5

## Loop over wide gsar DF appending one EW day to estimation period
for(i in 1:length(rank.list)){
  rank.list[[i]] <- 
    gsar.df.w[gsar.df.w$time < -wind.l | gsar.df.w$time == e.w[i],]
}

## Identify T (KP 2011, 956, eq. 9)
T <- nrow(rank.list[[1]])

## Set up DF for storage in next step
grank.df <- data.frame(
  time = -5:5,
  S.ubar = NA,
  Ubar.0 = NA
)

## Loop over rank.list implementing equations 9 and 12-15 (KP 2011, 956)
for(i in 1:length(rank.list)){
  # Rank operation (numerator, eq. 9, RHS)
  rank.list[[i]][-1] <- lapply(rank.list[[i]][-1], rank) 
  # U.it (eq. 9, LHS)
  rank.list[[i]][-1] <- lapply(rank.list[[i]][-1], 
                               function(x) x / (T + 1) - 0.5)
  # Ubar.t (eq. 15)
  rank.list[[i]]$Ubar.t <- rowMeans(rank.list[[i]][-1])
  # Ubar2.t (eq. 14, aspect of RHS)
  rank.list[[i]]$Ubar2.t <- rank.list[[i]]$Ubar.t^2
  # S.ubar (eq. 14, LHS); store in grank.df
  grank.df[i,2] <- sqrt(sum(rank.list[[i]]$Ubar2.t) / T)
  # Extract Ubar.0 from rank.list (numerator, eq. 13, RHS)
  grank.df[i,3] <- rank.list[[i]][nrow(rank.list[[i]]),]$Ubar.t
  # Z (eq. 13, LHS)
  grank.df$Z <- grank.df$Ubar.0 / grank.df$S.ubar
  # factor (eq. 12, aspect of RHS)
  grank.df$factor <- ((T - 2) / (T - 1 - grank.df$Z^2))^0.5
  # t.grank (eq. 12, LHS)
  grank.df$t.grank <- grank.df$Z * grank.df$factor
}

## Clean up
rm(gsar.df, gsar.df.w, rank.list, i, T, e.w)

#------------------------------------------------------------------#
# 4) Implement Adjusted BMP (KP 2010)
#------------------------------------------------------------------#

## Source: Kolari and Pynnonen (KP). 2010. Event Study Testing with
## Cross-sectional Correlation of Abnormal Returns. The Review of
## Financial Studies, Vol. 23, No. 11 (November), pp. 3996-4025.

##-- Aggregate AR and SR --##

## Mean
ar.mean <- aggregate(ar ~ time, data = event.df, mean)
sr.mean <- aggregate(sr ~ time, data = event.df, mean)

## SD
ar.sd <- aggregate(ar ~ time, data = event.df, sd)
sr.sd <- aggregate(sr ~ time, data = event.df, sd)

##-- Rename and Merge --##

## Mean
colnames(ar.mean)[2] <- "aar" # Average abnormal return
colnames(sr.mean)[2] <- "asar" # Average scaled abnormal return

## SD
colnames(ar.sd)[2] <- "sd"
colnames(sr.sd)[2] <- "sd"

## Merge
aar.df <- merge(ar.mean, ar.sd)
asar.df <- merge(sr.mean, sr.sd)

##-- Generate Statistics --##

## Standard error
asar.df$se <- asar.df$sd / sqrt(n)

## T-stats
# Note, earlier steps were take to develop aspects of these stats

# BMP_stat (BMP (1991, 269, eq. 1) or KP (2010, 4002, eq. 6))
asar.df$BMP_stat <- asar.df$asar / asar.df$se

# Adjusted BMP (KP 2010, 4003)
asar.df$Adj_BMP <- asar.df$BMP_stat * kp.scale

## Clean up
rm(ar.mean, sr.mean, ar.sd, sr.sd, estimation.df)

#------------------------------------------------------------------#
# 5) Summarize Event Study Results (Table 2 and Figure 1)
#------------------------------------------------------------------#

## Develop p-values for Adj_BMP and GRANK
asar.df$Adj_BMP_p <- 2*pt(-abs(asar.df$Adj_BMP),df=n-1)
grank.df$GRANK_p <- 2*pt(-abs(grank.df$t.grank),df=n-1)

## Set up data summary data frame with AAR
summary.df <- aar.df[1:2]

## Combine DFs
summary.df <- cbind(summary.df, asar.df[ncol(asar.df)])
summary.df <- cbind(summary.df, grank.df[ncol(grank.df)])

## Significance codes
summary.df[paste0(names(summary.df)[-(1:2)],".sig")] <- 
  lapply(summary.df[-(1:2)],
         FUN = function(x) 
           with(summary.df, 
                ifelse(x < .01, "***", 
                       ifelse(x < .05, "**", 
                              ifelse(x < .1, "*", "")))))

## Column names
aar.names <- c("t", "AAR", "Adjusted BMP", "GRANK")
colnames(summary.df)[c(1:2, 5:6)] <- aar.names

## Create Table 2
table.2 <- summary.df[c(1:2, 5:6)]

## Export Table 2
write.csv(table.2, "table_2.csv", row.names = FALSE)

## Figure 1
ggplot(aar.df, aes(x = time)) +
  geom_line(aes(y = aar)) +
  geom_hline(yintercept = 0) +
  scale_x_continuous(breaks = -5:5) +
  ylab("Average Abnormal Return") + 
  xlab("Event Time") +
  theme_bw()

#------------------------------------------------------------------#
# 6) Firm-level cross-sectional analysis
#------------------------------------------------------------------#

## Import firm-level data
fl.df <- read.csv("firm_data.csv", stringsAsFactors = FALSE)

## Create log vars for easier analysis
fl.df$ln_mkv <- log(fl.df$mkv)
fl.df$ln_de <- log(fl.df$de_ratio)
fl.df$ln_rd <- log(fl.df$rd_sale + 1)
fl.df$ln_ptb <- log(fl.df$ptb)
fl.df$ln_rev_dep <- log(fl.df$rev_dep)

## Regression
lm.1 <- lm(ar ~ ln_rev_dep +
             ln_mkv + 
             roa + 
             ln_de + 
             ln_rd +
             ln_ptb, 
           data = fl.df)

summary(lm.1)

## Clustered Standard Errors
lm.1.cse <- vcovCL(lm.1, cluster = ~ event)

## Store cse
cse.1 <- sqrt(diag(lm.1.cse))

## Summarize
(model.1 <- coeftest(lm.1, vcov = lm.1.cse))

## Store var names
var.names <-
  c("Dependence", 
    "Market Value",
    "Return on Assets", 
    "Debt to Equity",
    "R&D (% of sales)", 
    "Price to Book")

## Export regression table
stargazer(lm.1, 
          se = list(cse.1),
          out = "table_4.html",
          no.space = TRUE,
          covariate.labels = var.names,
          align = TRUE)

## Clean up
rm(list=ls())
